Lendo o Shapefile crime_mg:
crime_mg<- readOGR(dsn = "crime_mg", layer = "crime_mg",verbose = TRUE, use_iconv = TRUE, p4s = "+proj=longlat +ellps=WGS84")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/ricardo/Tresors/zz-pessoal/FGV/git/Trabalhos/GAEE/Tarefa 4/crime_mg", layer: "crime_mg"
## with 754 features
## It has 17 fields
## Integer64 fields read as strings: POP_RUR POP_URB POP_FEM POP_MAS
names(crime_mg)
## [1] "CODMUNI" "ID" "MUNIC" "AREA" "INDICE94" "INDICE95"
## [7] "GINI_91" "POP_94" "POP_RUR" "POP_URB" "POP_FEM" "POP_MAS"
## [13] "POP_TOT" "URBLEVEL" "PIB_PC" "X_COORD" "Y_COORD"
summary(crime_mg)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -51.06258 -39.85724
## y -22.91696 -14.23725
## Is projected: FALSE
## proj4string : [+proj=longlat +ellps=WGS84]
## Data attributes:
## CODMUNI ID MUNIC
## Min. : 10 Min. : 0.0 Abadia dos Dourados: 1
## 1st Qu.:1852 1st Qu.:188.2 Abaeté : 1
## Median :3645 Median :377.5 Abre-Campo : 1
## Mean :3636 Mean :377.5 Acaiaca : 1
## 3rd Qu.:5444 3rd Qu.:566.8 Açucena : 1
## Max. :7220 Max. :755.0 Água Boa : 1
## (Other) :748
## AREA INDICE94 INDICE95 GINI_91
## Min. : 40.3 Min. : 0.260 Min. : 0.420 Min. :0.0000
## 1st Qu.: 208.9 1st Qu.: 8.143 1st Qu.: 9.643 1st Qu.:0.5129
## Median : 371.4 Median :12.060 Median :13.975 Median :0.5578
## Mean : 779.4 Mean :13.329 Mean :15.449 Mean :0.5330
## 3rd Qu.: 827.5 3rd Qu.:17.203 3rd Qu.:19.820 3rd Qu.:0.5960
## Max. :13292.1 Max. :41.300 Max. :47.690 Max. :0.7127
##
## POP_94 POP_RUR POP_URB POP_FEM POP_MAS
## Min. : 820 0 : 33 0 : 33 0 : 33 0 : 33
## 1st Qu.: 4724 1219 : 3 1374 : 3 1042 : 2 1226 : 2
## Median : 8602 1994 : 3 10429 : 2 1070 : 2 1483 : 2
## Mean : 21640 4995 : 3 1120 : 2 1368 : 2 1539 : 2
## 3rd Qu.: 18054 12472 : 2 11996 : 2 1449 : 2 1643 : 2
## Max. :2079280 1252 : 2 1257 : 2 1619 : 2 1658 : 2
## (Other):708 (Other):710 (Other):711 (Other):711
## POP_TOT URBLEVEL PIB_PC X_COORD
## Min. : 0 Min. :0.0000 Min. : 0 Min. :-50.81
## 1st Qu.: 4295 1st Qu.:0.3743 1st Qu.: 1665 1st Qu.:-45.55
## Median : 8216 Median :0.5445 Median : 2446 Median :-44.06
## Mean : 20865 Mean :0.5373 Mean : 3036 Mean :-44.22
## 3rd Qu.: 17710 3rd Qu.:0.7120 3rd Qu.: 3525 3rd Qu.:-42.76
## Max. :2020161 Max. :0.9970 Max. :37728 Max. :-40.03
##
## Y_COORD
## Min. :-22.81
## 1st Qu.:-21.19
## Median :-20.01
## Mean :-19.81
## 3rd Qu.:-18.77
## Max. :-14.46
##
Mapa de Minas Gerais com os municípios, como no shapefile, sem tema:
tmap::qtm(crime_mg,title = "Mapa de Minas Gerais")
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
Calculo de Moran’s I para verificação da auto-correlação espacial das variáveis. Aqui usamos a metodologia Rainha (queen) na matriz de vizinhança:
crime_mg_nb = poly2nb(crime_mg, queen=TRUE, row.names=crime_mg$X_COORD)
crime_mg_w <- nb2listw(crime_mg_nb, style="W")
cmg_munic <- as.numeric(crime_mg$MUNIC)
cmg_area <- as.numeric(crime_mg$AREA)
cmg_indice94 <- as.numeric(crime_mg$INDICE94)
cmg_indice95 <- as.numeric(crime_mg$INDICE95)
cmg_gini_91 <- as.numeric(crime_mg$GINI_91)
cmg_pop_94 <- as.numeric(crime_mg$POP_94)
cmg_pop_rur <- as.numeric(crime_mg$POP_RUR)
cmg_pop_urb <- as.numeric(crime_mg$POP_URB)
cmg_pop_fem <- as.numeric(crime_mg$POP_FEM)
cmg_pop_mas <- as.numeric(crime_mg$POP_MAS)
cmg_pop_tot <- as.numeric(crime_mg$POP_TOT)
cmg_urblevel <- as.numeric(crime_mg$URBLEVEL)
cmg_pib_pc <- as.numeric(crime_mg$PIB_PC)
moran_i_munic <- moran(cmg_munic,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_area <- moran(cmg_area,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_indice94 <- moran(cmg_indice94,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_indice95 <- moran(cmg_indice95,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_gini_91 <- moran(cmg_gini_91,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pop_94 <- moran(cmg_pop_94,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pop_rur <- moran(cmg_pop_rur,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pop_urb <- moran(cmg_pop_urb,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pop_fem <- moran(cmg_pop_fem,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pop_mas <- moran(cmg_pop_mas,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pop_tot <- moran(cmg_pop_tot,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_urblevel <- moran(cmg_urblevel,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
moran_i_pib_pc <- moran(cmg_pib_pc,crime_mg_w, length(crime_mg_nb), Szero(crime_mg_w))
Mostrando todas as auto-correlações:
moran <- c("label","i")
label <- c("moran_i_munic",
"moran_i_area",
"moran_i_indice94",
"moran_i_indice95",
"moran_i_gini_91",
"moran_i_pop_94",
"moran_i_pop_rur",
"moran_i_pop_urb",
"moran_i_pop_fem",
"moran_i_pop_mas",
"moran_i_pop_tot",
"moran_i_urblevel",
"moran_i_pib_pc"
)
moran_i <- c(moran_i_munic$I
,moran_i_area$I
,moran_i_indice94$I
,moran_i_indice95$I
,moran_i_gini_91$I
,moran_i_pop_94$I
,moran_i_pop_rur$I
,moran_i_pop_urb$I
,moran_i_pop_fem$I
,moran_i_pop_mas$I
,moran_i_pop_tot$I
,moran_i_urblevel$I
,moran_i_pib_pc$I
)
moran <- data.frame(label = label, moran_i = moran_i)
moran[order(moran$moran_i,decreasing = TRUE),]
Mostrando Moran’s I das variáveis AREA, INDICE94 e INDICE95, que possuem os maiores I, e POP_URB, que possui o menor:
{
moran.plot(x = cmg_area,listw = crime_mg_w,labels = FALSE)
title("Moran's I de AREA")
moran.plot(x = cmg_indice94,listw = crime_mg_w,labels = FALSE)
title("Moran's I de INDICE94")
moran.plot(x = cmg_indice95,listw = crime_mg_w,labels = FALSE)
title("Moran's I de INDICE95")
moran.plot(x = cmg_pop_urb,listw = crime_mg_w,labels = FALSE)
title("Moran's I de POP_URB")
}
Calculando LISA
Verificando a média de links entre vizinhos:
crime_mg_nb
## Neighbour list object:
## Number of regions: 754
## Number of nonzero links: 4302
## Percentage nonzero weights: 0.7567069
## Average number of links: 5.70557
Como a média de links é 5.7, passamos este valor como parâmetro com o comando “mean(card(crime_mg_nb))” para o cálculo do LISA para as variáveis AREA - INDICE94 - INDICE95 - POP_URB:
LISA_AREA <- lisa(x = crime_mg$X_COORD, y = crime_mg$Y_COORD, z = crime_mg$AREA, neigh = mean(card(crime_mg_nb)))
LISA_INDICE94 <- lisa(x = crime_mg$X_COORD, y = crime_mg$Y_COORD, z = crime_mg$INDICE94, neigh = mean(card(crime_mg_nb)))
LISA_INDICE95 <- lisa(x = crime_mg$X_COORD, y = crime_mg$Y_COORD, z = crime_mg$INDICE95, neigh = mean(card(crime_mg_nb)))
LISA_POP_URB <- lisa(x = crime_mg$X_COORD, y = crime_mg$Y_COORD, z = as.numeric(crime_mg$POP_URB), neigh = mean(card(crime_mg_nb)))
Plot dos mapas LISA:
crime_mg$LISA_AREA_p <- LISA_AREA$p
crime_mg$LISA_INDICE94_p <- LISA_INDICE94$p
crime_mg$LISA_INDICE95_p <- LISA_INDICE95$p
crime_mg$LISA_POP_URB_p <- LISA_POP_URB$p
crime_mg$LISA_AREA_corr <- LISA_AREA$correlation
crime_mg$LISA_INDICE94_corr <- LISA_INDICE94$correlation
crime_mg$LISA_INDICE95_corr <- LISA_INDICE95$correlation
crime_mg$LISA_POP_URB_corr <- LISA_POP_URB$correlation
tmap::tm_shape(crime_mg, simplify = 1) +
tmap::tm_polygons() +
tmap::tm_shape(crime_mg, simplify = 1) +
tmap::tm_fill(c("LISA_AREA_p","LISA_AREA_corr"), midpoint = 0) +
tmap::tm_style("natural")
tmap::tm_shape(crime_mg) +
tmap::tm_polygons() +
tmap::tm_shape(crime_mg, simplify = 1) +
tmap::tm_fill(c("LISA_INDICE94_p","LISA_INDICE94_corr"), midpoint = 0) +
tmap::tm_style("natural")
tmap::tm_shape(crime_mg) +
tmap::tm_polygons() +
tmap::tm_shape(crime_mg, simplify = 1) +
tmap::tm_fill(c("LISA_INDICE95_p","LISA_INDICE95_corr"), midpoint = 0) +
tmap::tm_style("natural")
tmap::tm_shape(crime_mg) +
tmap::tm_polygons() +
tmap::tm_shape(crime_mg, simplify = 1) +
tmap::tm_fill(c("LISA_POP_URB_p","LISA_POP_URB_corr"), midpoint = 0) +
tmap::tm_style("natural")
Fizemos também a implementação dos mapas LISA no GeoDa e estes foram diferentes do apresentado aqui. Abaixo, os gráficos gerados no GeoDa:
knitr::include_graphics("MoransI-Geoda-Area.png")
knitr::include_graphics("LISA Significance - Geoda - Area.png")
knitr::include_graphics("LISA Cluster - Geoda - Area.png")
knitr::include_graphics("MoransI-Geoda-INDICE94.png")
knitr::include_graphics("LISA Significance - Geoda - INDICE94.png")
knitr::include_graphics("LISA Cluster - Geoda - INDICE94.png")
knitr::include_graphics("MoransI-Geoda-INDICE95.png")
knitr::include_graphics("LISA Significance - Geoda - INDICE95.png")
knitr::include_graphics("LISA Cluster - Geoda - INDICE95.png")
knitr::include_graphics("MoransI-Geoda-POP_URB.png")
knitr::include_graphics("LISA Significance - Geoda - POP_URB.png")
knitr::include_graphics("LISA Cluster - Geoda - POP_URB.png")
Regressão linear simples:
ap <- as.data.frame(cbind(crime_mg$MUNIC, crime_mg$AREA, crime_mg$INDICE94,
crime_mg$INDICE95, crime_mg$GINI_91, crime_mg$POP_94,
crime_mg$POP_RUR, crime_mg$POP_URB, crime_mg$POP_FEM,
crime_mg$POP_MAS, crime_mg$POP_TOT, crime_mg$URBLEVEL,
crime_mg$PIB_PC))
colnames(ap) <- c("ID",names(crime_mg@data[4:15]))
head(ap)
lmK <- lm(formula = crime_mg$INDICE95 ~ URBLEVEL, data = ap)
summary(lmK)
##
## Call:
## lm(formula = crime_mg$INDICE95 ~ URBLEVEL, data = ap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.873 -4.664 -1.174 3.639 37.569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.3208 0.6251 10.11 <2e-16 ***
## URBLEVEL 16.9877 1.0667 15.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.845 on 752 degrees of freedom
## Multiple R-squared: 0.2522, Adjusted R-squared: 0.2512
## F-statistic: 253.6 on 1 and 752 DF, p-value: < 2.2e-16
Regressao linear espacial - SAR
ap <- as.data.frame(cbind(crime_mg$MUNIC, crime_mg$AREA, crime_mg$INDICE94,
crime_mg$INDICE95, crime_mg$GINI_91, crime_mg$POP_94,
crime_mg$POP_RUR, crime_mg$POP_URB, crime_mg$POP_FEM,
crime_mg$POP_MAS, crime_mg$POP_TOT, crime_mg$URBLEVEL,
crime_mg$PIB_PC))
colnames(ap) <- c("ID",names(crime_mg@data[4:15]))
crime_mg_nb = poly2nb(crime_mg, queen=TRUE, row.names=crime_mg$X_COORD)
crime_mg_w <- nb2listw(crime_mg_nb, style="W")
sar.ap <- lagsarlm(crime_mg$INDICE95 ~ URBLEVEL,data=ap,crime_mg_w,method="eigen")
SARk_SSE <- sar.ap$SSE
SST <- sum((ap$INDICE95 - mean(ap$INDICE95))^2)
r2_SARk <- 1 - (SARk_SSE/SST)
r2_SARk
## [1] 0.3314096
summary(sar.ap)
##
## Call:
## lagsarlm(formula = crime_mg$INDICE95 ~ URBLEVEL, data = ap, listw = crime_mg_w,
## method = "eigen")
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2482 -4.2371 -1.0771 3.3952 33.9250
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.35307 0.79725 2.9515 0.003163
## URBLEVEL 13.93704 1.04857 13.2914 < 2.2e-16
##
## Rho: 0.35437, LR test value: 66.163, p-value: 4.4409e-16
## Asymptotic standard error: 0.044457
## z-value: 7.9712, p-value: 1.5543e-15
## Wald statistic: 63.539, p-value: 1.5543e-15
##
## Log likelihood: -2486.1 for lag model
## ML residual variance (sigma squared): 41.776, (sigma: 6.4634)
## Number of observations: 754
## Number of parameters estimated: 4
## AIC: 4980.2, (AIC for lm: 5044.4)
## LM test for residual autocorrelation
## test value: 10.579, p-value: 0.001144
cat("Rˆ2 SAR: ",r2_SARk)
## Rˆ2 SAR: 0.3314096
cat("Rˆ2 LM:",summary(lmK)$adj.r.squared)
## Rˆ2 LM: 0.2511925
O modelo espacial SAR apresentou ganho de 8% versus o modelo linear simples.
Regressão linear simples:
ap <- as.data.frame(cbind(crime_mg$MUNIC, crime_mg$AREA, crime_mg$INDICE94,
crime_mg$INDICE95, crime_mg$GINI_91, crime_mg$POP_94,
crime_mg$POP_RUR, crime_mg$POP_URB, crime_mg$POP_FEM,
crime_mg$POP_MAS, crime_mg$POP_TOT, crime_mg$URBLEVEL,
crime_mg$PIB_PC))
colnames(ap) <- c("ID",names(crime_mg@data[4:15]))
head(ap)
lmK <- lm(formula = crime_mg$INDICE95 ~ URBLEVEL, data = ap)
summary(lmK)
##
## Call:
## lm(formula = crime_mg$INDICE95 ~ URBLEVEL, data = ap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.873 -4.664 -1.174 3.639 37.569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.3208 0.6251 10.11 <2e-16 ***
## URBLEVEL 16.9877 1.0667 15.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.845 on 752 degrees of freedom
## Multiple R-squared: 0.2522, Adjusted R-squared: 0.2512
## F-statistic: 253.6 on 1 and 752 DF, p-value: < 2.2e-16
Regressão linear GWR:
coords <- cbind(crime_mg$X_COORD,crime_mg$Y_COORD)
colnames(coords) <- c("X","Y")
ap <- as.data.frame(cbind(crime_mg$MUNIC, crime_mg$AREA, crime_mg$INDICE94,
crime_mg$INDICE95, crime_mg$GINI_91, crime_mg$POP_94,
crime_mg$POP_RUR, crime_mg$POP_URB, crime_mg$POP_FEM,
crime_mg$POP_MAS, crime_mg$POP_TOT, crime_mg$URBLEVEL,
crime_mg$PIB_PC))
colnames(ap) <- c("ID",names(crime_mg@data[4:15]))
bwGauss <- gwr.sel(crime_mg$INDICE95 ~ URBLEVEL,data=ap,coords=coords,adapt=TRUE,method="aic",
gweight=gwr.Gauss,verbose=FALSE)
gwr.ap <- gwr(crime_mg$INDICE95 ~ URBLEVEL, data=ap,coords=coords,bandwidth=bwGauss,
gweight=gwr.Gauss,adapt=bwGauss,hatmatrix=TRUE)
gwr.ap
## Call:
## gwr(formula = crime_mg$INDICE95 ~ URBLEVEL, data = ap, coords = coords,
## bandwidth = bwGauss, gweight = gwr.Gauss, adapt = bwGauss,
## hatmatrix = TRUE)
## Kernel function: gwr.Gauss
## Adaptive quantile: 0.013237 (about 9 of 754 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu. Max. Global
## X.Intercept. -4.5982 4.2098 6.4983 9.3427 29.7712 6.3208
## URBLEVEL -8.3511 10.0699 15.4027 20.0811 39.7302 16.9877
## Number of data points: 754
## Effective number of parameters (residual: 2traceS - traceS'S): 102.7988
## Effective degrees of freedom (residual: 2traceS - traceS'S): 651.2012
## Sigma (residual: 2traceS - traceS'S): 6.127271
## Effective number of parameters (model: traceS): 71.52246
## Effective degrees of freedom (model: traceS): 682.4775
## Sigma (model: traceS): 5.985225
## Sigma (ML): 5.694282
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 4923.585
## AIC (GWR p. 96, eq. 4.22): 4834.391
## Residual sum of squares: 24448.34
## Quasi-global R2: 0.4810664
GWR_SSE <- gwr.ap$results$rss
r2_GWR <- 1 - (GWR_SSE/SST)
r2_GWR
## [1] 0.4810664
cat("Coeficientes LM:",summary(lmK)$coefficients, "\n")
## Coeficientes LM: 6.320829 16.98768 0.6250511 1.066745 10.1125 15.92479 1.258985e-22 2.029188e-49
cat("Rˆ2 LM:",summary(lmK)$adj.r.squared, "\n")
## Rˆ2 LM: 0.2511925
cat("Coeficientes GWR:",summary(gwr.ap$lm$coefficients), "\n")
## Coeficientes GWR: 6.320829 8.987542 11.65426 11.65426 14.32097 16.98768
cat("Rˆ2 GWR:",r2_GWR, "\n")
## Rˆ2 GWR: 0.4810664
cat("Rˆ2 LM:",summary(lmK)$adj.r.squared, "\n")
## Rˆ2 LM: 0.2511925
cat("Rˆ2 SAR: ",r2_SARk, "\n")
## Rˆ2 SAR: 0.3314096
cat("Rˆ2 GWR: ",r2_GWR, "\n")
## Rˆ2 GWR: 0.4810664
Sim, GWR Aumenta 15% o ganho em relação ao SAR, que já era 8% maior que o modelo simples.
Implementação do modelo multivariado stepwise - regressão simples:
ap <- as.data.frame(cbind(crime_mg$MUNIC, crime_mg$AREA, crime_mg$INDICE94,
crime_mg$INDICE95, crime_mg$GINI_91, crime_mg$POP_94,
crime_mg$POP_RUR, crime_mg$POP_URB, crime_mg$POP_FEM,
crime_mg$POP_MAS, crime_mg$POP_TOT, crime_mg$URBLEVEL,
crime_mg$PIB_PC))
colnames(ap) <- c("ID",names(crime_mg@data[4:15]))
lm.ap <- step(lm(crime_mg$INDICE95 ~ ., data=ap))
## Start: AIC=2234.09
## crime_mg$INDICE95 ~ ID + AREA + INDICE94 + GINI_91 + POP_94 +
## POP_RUR + POP_URB + POP_FEM + POP_MAS + POP_TOT + URBLEVEL +
## PIB_PC
##
## Df Sum of Sq RSS AIC
## - AREA 1 3.8 14103 2232.3
## - ID 1 6.2 14106 2232.4
## - POP_MAS 1 10.7 14110 2232.7
## - POP_FEM 1 14.5 14114 2232.9
## - POP_RUR 1 15.7 14115 2232.9
## - POP_URB 1 18.3 14118 2233.1
## - POP_TOT 1 18.3 14118 2233.1
## - POP_94 1 18.6 14118 2233.1
## <none> 14100 2234.1
## - PIB_PC 1 41.8 14141 2234.3
## - GINI_91 1 232.1 14332 2244.4
## - URBLEVEL 1 480.7 14580 2257.4
## - INDICE94 1 18251.9 32351 2858.3
##
## Step: AIC=2232.29
## crime_mg$INDICE95 ~ ID + INDICE94 + GINI_91 + POP_94 + POP_RUR +
## POP_URB + POP_FEM + POP_MAS + POP_TOT + URBLEVEL + PIB_PC
##
## Df Sum of Sq RSS AIC
## - ID 1 6.4 14110 2230.6
## - POP_MAS 1 11.7 14115 2230.9
## - POP_FEM 1 15.3 14119 2231.1
## - POP_RUR 1 16.6 14120 2231.2
## - POP_URB 1 18.5 14122 2231.3
## - POP_TOT 1 20.5 14124 2231.4
## - POP_94 1 20.7 14124 2231.4
## <none> 14103 2232.3
## - PIB_PC 1 41.4 14145 2232.5
## - GINI_91 1 231.5 14335 2242.6
## - URBLEVEL 1 477.9 14581 2255.4
## - INDICE94 1 18434.4 32538 2860.6
##
## Step: AIC=2230.64
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + POP_94 + POP_RUR + POP_URB +
## POP_FEM + POP_MAS + POP_TOT + URBLEVEL + PIB_PC
##
## Df Sum of Sq RSS AIC
## - POP_MAS 1 12.7 14122 2229.3
## - POP_RUR 1 15.9 14126 2229.5
## - POP_FEM 1 16.2 14126 2229.5
## - POP_URB 1 18.4 14128 2229.6
## - POP_TOT 1 19.5 14129 2229.7
## - POP_94 1 19.7 14129 2229.7
## <none> 14110 2230.6
## - PIB_PC 1 41.1 14151 2230.8
## - GINI_91 1 228.9 14339 2240.8
## - URBLEVEL 1 474.5 14584 2253.6
## - INDICE94 1 18464.5 32574 2859.5
##
## Step: AIC=2229.31
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + POP_94 + POP_RUR + POP_URB +
## POP_FEM + POP_TOT + URBLEVEL + PIB_PC
##
## Df Sum of Sq RSS AIC
## - POP_FEM 1 3.5 14126 2227.5
## - POP_RUR 1 13.9 14136 2228.1
## - POP_TOT 1 19.2 14142 2228.3
## - POP_URB 1 19.4 14142 2228.3
## - POP_94 1 19.6 14142 2228.4
## <none> 14122 2229.3
## - PIB_PC 1 40.7 14163 2229.5
## - GINI_91 1 230.6 14353 2239.5
## - URBLEVEL 1 474.7 14597 2252.2
## - INDICE94 1 18477.9 32600 2858.1
##
## Step: AIC=2227.5
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + POP_94 + POP_RUR + POP_URB +
## POP_TOT + URBLEVEL + PIB_PC
##
## Df Sum of Sq RSS AIC
## - POP_URB 1 16.2 14142 2226.4
## - POP_TOT 1 18.9 14145 2226.5
## - POP_94 1 19.3 14145 2226.5
## - POP_RUR 1 21.3 14147 2226.6
## <none> 14126 2227.5
## - PIB_PC 1 39.0 14165 2227.6
## - GINI_91 1 227.1 14353 2237.5
## - URBLEVEL 1 476.7 14603 2250.5
## - INDICE94 1 18547.7 32674 2857.8
##
## Step: AIC=2226.37
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + POP_94 + POP_RUR + POP_TOT +
## URBLEVEL + PIB_PC
##
## Df Sum of Sq RSS AIC
## - POP_TOT 1 18.2 14160 2225.3
## - POP_94 1 18.7 14161 2225.4
## - POP_RUR 1 21.7 14164 2225.5
## <none> 14142 2226.4
## - PIB_PC 1 39.4 14181 2226.5
## - GINI_91 1 263.6 14406 2238.3
## - URBLEVEL 1 465.1 14607 2248.8
## - INDICE94 1 18809.7 32952 2862.2
##
## Step: AIC=2225.34
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + POP_94 + POP_RUR + URBLEVEL +
## PIB_PC
##
## Df Sum of Sq RSS AIC
## - POP_94 1 2.1 14162 2223.4
## - POP_RUR 1 21.6 14182 2224.5
## - PIB_PC 1 37.4 14198 2225.3
## <none> 14160 2225.3
## - GINI_91 1 257.1 14417 2236.9
## - URBLEVEL 1 448.5 14609 2246.8
## - INDICE94 1 18805.2 32966 2860.5
##
## Step: AIC=2223.45
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + POP_RUR + URBLEVEL +
## PIB_PC
##
## Df Sum of Sq RSS AIC
## - POP_RUR 1 21.0 14183 2222.6
## - PIB_PC 1 36.1 14199 2223.4
## <none> 14162 2223.4
## - GINI_91 1 255.2 14418 2234.9
## - URBLEVEL 1 447.1 14610 2244.9
## - INDICE94 1 19048.5 33211 2864.1
##
## Step: AIC=2222.57
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL + PIB_PC
##
## Df Sum of Sq RSS AIC
## - PIB_PC 1 36.5 14220 2222.5
## <none> 14183 2222.6
## - GINI_91 1 234.5 14418 2232.9
## - URBLEVEL 1 454.0 14637 2244.3
## - INDICE94 1 19027.5 33211 2862.1
##
## Step: AIC=2222.5
## crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL
##
## Df Sum of Sq RSS AIC
## <none> 14220 2222.5
## - GINI_91 1 252.2 14472 2233.8
## - URBLEVEL 1 550.7 14771 2249.2
## - INDICE94 1 19206.1 33426 2864.9
lm.ap
##
## Call:
## lm(formula = crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,
## data = ap)
##
## Coefficients:
## (Intercept) INDICE94 GINI_91 URBLEVEL
## 4.6200 0.8295 -5.8052 5.3346
summary(lm.ap)
##
## Call:
## lm(formula = crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,
## data = ap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.6372 -2.4362 -0.2178 2.3819 29.1584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.62004 0.72718 6.353 3.65e-10 ***
## INDICE94 0.82950 0.02606 31.827 < 2e-16 ***
## GINI_91 -5.80519 1.59168 -3.647 0.000283 ***
## URBLEVEL 5.33462 0.98980 5.390 9.47e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.354 on 750 degrees of freedom
## Multiple R-squared: 0.6982, Adjusted R-squared: 0.697
## F-statistic: 578.3 on 3 and 750 DF, p-value: < 2.2e-16
lmKmv <- lm(formula = crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,
data = ap)
summary(lmKmv)
##
## Call:
## lm(formula = crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,
## data = ap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.6372 -2.4362 -0.2178 2.3819 29.1584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.62004 0.72718 6.353 3.65e-10 ***
## INDICE94 0.82950 0.02606 31.827 < 2e-16 ***
## GINI_91 -5.80519 1.59168 -3.647 0.000283 ***
## URBLEVEL 5.33462 0.98980 5.390 9.47e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.354 on 750 degrees of freedom
## Multiple R-squared: 0.6982, Adjusted R-squared: 0.697
## F-statistic: 578.3 on 3 and 750 DF, p-value: < 2.2e-16
Implementação do modelo multivariado stepwise - regressão SAR
ap <- as.data.frame(cbind(crime_mg$MUNIC, crime_mg$AREA, crime_mg$INDICE94,
crime_mg$INDICE95, crime_mg$GINI_91, crime_mg$POP_94,
crime_mg$POP_RUR, crime_mg$POP_URB, crime_mg$POP_FEM,
crime_mg$POP_MAS, crime_mg$POP_TOT, crime_mg$URBLEVEL,
crime_mg$PIB_PC))
colnames(ap) <- c("ID",names(crime_mg@data[4:15]))
sarmv.ap <- lagsarlm(crime_mg$INDICE95 ~ INDICE94 + GINI_91 +
URBLEVEL,data=ap,crime_mg_w,method="eigen")
SARkmv_SSE <- sarmv.ap$SSE
SST <- sum((ap$INDICE95 - mean(ap$INDICE95))^2)
r2_SARkmv <- 1 - (SARkmv_SSE/SST)
r2_SARkmv
## [1] 0.7029335
summary(sarmv.ap)
##
## Call:
## lagsarlm(formula = crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,
## data = ap, listw = crime_mg_w, method = "eigen")
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.43661 -2.38664 -0.19365 2.28926 28.43605
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.36490 0.82039 4.1016 4.103e-05
## INDICE94 0.80411 0.02706 29.7163 < 2.2e-16
## GINI_91 -5.30895 1.58190 -3.3561 0.0007906
## URBLEVEL 4.67297 0.99603 4.6916 2.711e-06
##
## Rho: 0.10647, LR test value: 10.464, p-value: 0.0012169
## Asymptotic standard error: 0.033258
## z-value: 3.2013, p-value: 0.0013679
## Wald statistic: 10.249, p-value: 0.0013679
##
## Log likelihood: -2171.899 for lag model
## ML residual variance (sigma squared): 18.562, (sigma: 4.3083)
## Number of observations: 754
## Number of parameters estimated: 6
## AIC: 4355.8, (AIC for lm: 4364.3)
## LM test for residual autocorrelation
## test value: 0.10236, p-value: 0.74902
Comparação dos modelos Multi-variados de SAR e LM:
cat("Rˆ2 SAR: ",r2_SARk, "\n")
## Rˆ2 SAR: 0.3314096
cat("Rˆ2 LM:",summary(lmK)$adj.r.squared,"\n")
## Rˆ2 LM: 0.2511925
cat("Rˆ2 SAR MV: ",r2_SARkmv, "\n")
## Rˆ2 SAR MV: 0.7029335
cat("Rˆ2 LM MV:",summary(lmKmv)$adj.r.squared,"\n")
## Rˆ2 LM MV: 0.6969645
No modelo multivariado o ganho é de 1% sobre o modelo linear simples para o modelo SAR, porém o ganho foi de +37% com relação ao modelo original univariado.
Implementação do modelo multivariado stepwise - regressão GWR
bwGaussMV <- gwr.sel(crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,data=ap,coords=coords,adapt=TRUE,method="aic",
gweight=gwr.Gauss,verbose=FALSE)
gwrMV.ap <- gwr(crime_mg$INDICE95 ~ INDICE94 + GINI_91 +
URBLEVEL, data=ap,coords=coords,bandwidth=bwGauss,
gweight=gwr.Gauss,adapt=bwGaussMV,hatmatrix=TRUE)
gwrMV.ap
## Call:
## gwr(formula = crime_mg$INDICE95 ~ INDICE94 + GINI_91 + URBLEVEL,
## data = ap, coords = coords, bandwidth = bwGauss, gweight = gwr.Gauss,
## adapt = bwGaussMV, hatmatrix = TRUE)
## Kernel function: gwr.Gauss
## Adaptive quantile: 0.0411007 (about 30 of 754 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu. Max. Global
## X.Intercept. -2.68291 2.58872 3.82700 8.52713 20.59749 4.6200
## INDICE94 0.62641 0.75768 0.80116 0.83534 1.02374 0.8295
## GINI_91 -27.22302 -11.11227 -5.72057 -0.53447 13.64553 -5.8052
## URBLEVEL -2.24401 2.25196 5.03699 8.09010 12.19130 5.3346
## Number of data points: 754
## Effective number of parameters (residual: 2traceS - traceS'S): 66.21814
## Effective degrees of freedom (residual: 2traceS - traceS'S): 687.7819
## Sigma (residual: 2traceS - traceS'S): 4.135734
## Effective number of parameters (model: traceS): 46.62188
## Effective degrees of freedom (model: traceS): 707.3781
## Sigma (model: traceS): 4.078046
## Sigma (ML): 3.949956
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 4313.115
## AIC (GWR p. 96, eq. 4.22): 4257.927
## Residual sum of squares: 11764.02
## Quasi-global R2: 0.7503001
SST <- sum((ap$INDICE95 - mean(ap$INDICE95))^2)
GWR_MV_SSE <- gwrMV.ap$results$rss
r2_GWR_MV <- 1 - (GWR_MV_SSE/SST)
r2_GWR_MV
## [1] 0.7503001
Comparando os resultados finais:
cat("Rˆ2 GWR: ",r2_GWR, "\n")
## Rˆ2 GWR: 0.4810664
cat("Rˆ2 SAR: ",r2_SARk, "\n")
## Rˆ2 SAR: 0.3314096
cat("Rˆ2 LM:",summary(lmK)$adj.r.squared,"\n")
## Rˆ2 LM: 0.2511925
cat("Rˆ2 GWR MV: ",r2_GWR_MV, "\n")
## Rˆ2 GWR MV: 0.7503001
cat("Rˆ2 SAR MV: ",r2_SARkmv, "\n")
## Rˆ2 SAR MV: 0.7029335
cat("Rˆ2 LM MV:",summary(lmKmv)$adj.r.squared,"\n")
## Rˆ2 LM MV: 0.6969645
Notamos que o modelo GWR tambem é beneficiado por uma análise multivariada, tendo aumentado 27%, passando de 48.1% para 75%.
ani_map <- tmap::tm_shape(crime_mg, simplify = 1) +
tmap::tm_fill() +
tmap::tm_shape(crime_mg) +
tmap::tm_fill(c("INDICE94","INDICE95")) +
tmap::tm_style(style = "natural", legend.outside = TRUE) +
tmap::tm_facets(free.scales.symbol.size = FALSE, nrow=1,ncol=1) +
tmap::tm_layout(main.title = "Evolucao do Indice de Criminalidade de 94-95") +
tmap::tm_polygons()
tmap_animation(ani_map, loop = TRUE, delay=200, filename = "CRIME_MG.gif")
knitr::include_graphics("CRIME_MG.gif")